# -*- coding: utf-8 -*-
from scrapy_redis.spiders import RedisSpider
from scrapy.http import Request
import os
import pymysql
import time


class SaveposterSpider(RedisSpider):
    '''
    该爬虫是用来下载数据库中所有的电影海报
    '''
    name = 'savePoster'
    allowed_domains = ['www.douban.com']

    # start_urls = ['http://www.douban.com/']

    def __init__(self):
        RedisSpider.__init__(self)
        self.conn = pymysql.connect(host='127.0.0.1', user='root', password='123456', db='test',
                                    charset='utf8')
        self.cursor = self.conn.cursor()
        self.base_path = r'E:\project\poster'  # 设置海报存贮位置

    def parse(self, response):
        sql = "select id, url from t_movie"
        self.cursor.execute(sql)

        for i, url in self.cursor.fetchall():
            # 将海报变为大尺寸
            large_url = url.split('/')
            large_url[-3] = 'l_ratio_poster'
            large_url = '/'.join(large_url)

            # 包装请求至图片下载调度器
            time.sleep(1)
            yield Request(
                url=large_url,
                callback=self.download_image,
                meta={'order': str(i)},
                dont_filter=True,
            )

    # 下载图片至本地
    def download_image(self, response):
        image_name = os.path.join(self.base_path, response.meta['order'] + '.jpg')
        with open(image_name, 'wb') as f:
            f.write(response.body)
